package com.example.test.主方法运行测试;

import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import sun.net.www.content.image.png;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class GetHtml {
    public static void main(String[] args) throws IOException {

        Document doc = Jsoup.connect("https://www.163.com/").get();
        Elements select = doc.select("[href]");
        for (Element element1 : select) {
            String text = element1.text();
            if (text.length()>10) {
                System.out.println(text);
            }
//            System.out.println("文章链接: " + element1);
            List<String> imgList = getImg(element1.toString());
//            if (imgList != null && imgList.size() > 0) {
//                for (String imgUrl : imgList) {
//                    String imglj = imgUrl.split("\"")[1].split("\"")[0];
//                    if (!imglj.contains("https")) {
//                        imglj = "https:" + imglj;
//                    }
//                    System.out.println(imglj);
//                }
//            }
        }

    }

    public static List<String> getImg(String s) {
        String regex;
        List<String> list = new ArrayList<String>();
        regex = "src=\"(.*?)\"";
        Pattern pa = Pattern.compile(regex, Pattern.DOTALL);
        Matcher ma = pa.matcher(s);
        while (ma.find()) {
            list.add(ma.group());
        }
        return list;
    }
}
